# -*- coding: utf-8 -*-
"""
Created on Tue Jan  9 10:25:57 2024

@author: 29165
"""

import requests
from bs4 import BeautifulSoup
import pandas as pd
#  <form name="listForm" id="listForm" method="post" action="/org.do?userType=1&orgCatalog=20">
url=r"https://www.crcrfsp.com/org.do?userType=1&orgCatalog=20&pageNum=1&pageTotal=23280"
r=requests.get(url=url,verify=False)
print(r.status_code)
#爬取到的网页内容
print(r.text)
#解析网页
html=r.text
soup=BeautifulSoup(html,'html.parser')
#找到全部企业信息
thread=soup.find(name='form',attrs={"name":"listForm","id":"listForm" ,
                                    "method":"post",
                                    "action":"/org.do?userType=1&orgCatalog=20"})
#将数据保存
ids=[]
names=[]
codes=[]
areas=[]
items=thread.find_all(name="tr")

for item in items:
    elements=item.find_all(name="td")
    if elements!=[]:
        ids.append(elements[0].string)
        names.append(elements[1].string)
        codes.append(elements[2].string)
        areas.append(elements[3].string)
all_imformation=[ids,names,codes,areas]
print(all_imformation)
#输出到excel
data=pd.DataFrame([ids,names,codes,areas])
data=data.T
data.columns=['ids','names','codes','areas']
data.to_excel('E:\微信公众号\程序\爬虫\上传版本\py文件\sample.xlsx')
data2=pd.read_excel('E:\微信公众号\程序\爬虫\上传版本\py文件\sample.xlsx')